Within the store, split the persistent information regarding a VM from the
authoremellor@ewan <emellor@ewan>
Fri, 30 Sep 2005 12:41:10 +0000 (13:41 +0100)
committeremellor@ewan <emellor@ewan>
Fri, 30 Sep 2005 12:41:10 +0000 (13:41 +0100)
transient information regarding a domain.  This allows live localhost migration,
which is important for testing migration, and also allows migration when we
have a distributed store.  The backend paths in the store now refer to the
frontend domain ID, not its UUID, and blktap has changed to match.

To support this split in the information, the cleanup procedure has been split
to match.

Change the save-restore interface between XendDomain, XendDomainInfo, and
XendCheckpoint, to remove some intermingling, in particular taking XendDomain
out of the loop for the restore procedure.

Improved the recovery procedure to avoid trying to destroy dom0 when recovering.

Added a lock around XendDomain.refresh and one around
XendDomainInfo.refreshShutdown, to improve the behaviour when recreating
domains at startup.  There are still races here, so more thought needs to be
given to the locking.

Live reconfiguration of an existing domain is temporarily broken.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
tools/blktap/xenbus.c
tools/python/xen/xend/XendCheckpoint.py
tools/python/xen/xend/XendDomain.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/server/DevController.py

index 45d931c45792b3ebfb2a9b69ea068eee4b65ba75..48b085c2fb3d77769bb5adae1e09d92c75efc08a 100644 (file)
@@ -116,25 +116,25 @@ int xs_exists(struct xs_handle *h, const char *path)
 
 
 /* This assumes that the domain name we are looking for is unique! */
-char *get_dom_uuid(struct xs_handle *h, const char *name)
+char *get_dom_domid(struct xs_handle *h, const char *name)
 {
-    char **e, *val, *uuid = NULL;
+    char **e, *val, *domid = NULL;
     int num, i, len;
     char *path;
 
-    e = xs_directory(h, "/domain", &num);
+    e = xs_directory(h, "/local/domain", &num);
 
     i=0;
     while (i < num) {
-        asprintf(&path, "/domain/%s/name", e[i]);
+        asprintf(&path, "/local/domain/%s/name", e[i]);
         val = xs_read(h, path, &len);
         free(path);
         if (val == NULL)
             continue;
         if (strcmp(val, name) == 0) {
             /* match! */
-            asprintf(&path, "/domain/%s/uuid", e[i]);
-            uuid = xs_read(h, path, &len);
+            asprintf(&path, "/local/domain/%s/domid", e[i]);
+            domid = xs_read(h, path, &len);
             free(val);
             free(path);
             break;
@@ -144,7 +144,7 @@ char *get_dom_uuid(struct xs_handle *h, const char *name)
     }
 
     free(e);
-    return uuid;
+    return domid;
 }
 
 static int strsep_len(const char *str, char c, unsigned int len)
@@ -553,15 +553,15 @@ static void blkback_probe(struct xs_handle *h, struct xenbus_watch *w,
 
 int add_blockdevice_probe_watch(struct xs_handle *h, const char *domname)
 {
-    char *uuid, *path;
+    char *domid, *path;
     struct xenbus_watch *vbd_watch;
     int er;
 
-    uuid = get_dom_uuid(h, domname);
+    domid = get_dom_domid(h, domname);
 
-    DPRINTF("%s: %s\n", domname, (uuid != NULL) ? uuid : "[ not found! ]");
+    DPRINTF("%s: %s\n", domname, (domid != NULL) ? domid : "[ not found! ]");
 
-    asprintf(&path, "/domain/%s/backend/vbd", uuid);
+    asprintf(&path, "/local/domain/%s/backend/vbd", domid);
     if (path == NULL) 
         return -ENOMEM;
 
index b0e73a97c9fa9fcd4d6cc2c715702fb56c8b3327..b0e1536a6a736407d2310e4684766dcbbae24041 100644 (file)
@@ -56,7 +56,7 @@ def save(xd, fd, dominfo, live):
     # simply uses the defaults compiled into libxenguest; see the comments 
     # and/or code in xc_linux_save() for more information. 
     cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd),
-           str(dominfo.domid), "0", "0", str(int(live)) ]
+           str(dominfo.getDomid()), "0", "0", str(int(live)) ]
     log.info("[xc_save] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
     
@@ -76,10 +76,10 @@ def save(xd, fd, dominfo, live):
             if fd == child.fromchild.fileno():
                 l = child.fromchild.readline()
                 if l.rstrip() == "suspend":
-                    log.info("suspending %d" % dominfo.domid)
-                    xd.domain_shutdown(dominfo.domid, reason='suspend')
+                    log.info("suspending %d" % dominfo.getDomid())
+                    xd.domain_shutdown(dominfo.getDomid(), reason='suspend')
                     dominfo.state_wait(XendDomainInfo.STATE_VM_SUSPENDED)
-                    log.info("suspend %d done" % dominfo.domid)
+                    log.info("suspend %d done" % dominfo.getDomid())
                     child.tochild.write("done\n")
                     child.tochild.flush()
         if filter(lambda (fd, event): event & select.POLLHUP, r):
@@ -90,11 +90,10 @@ def save(xd, fd, dominfo, live):
     if child.wait() != 0:
         raise XendError("xc_save failed: %s" % lasterr)
 
-    dominfo.closeStoreChannel()
-    xd.domain_destroy(dominfo.domid)
+    dominfo.destroy()
     return None
 
-def restore(xd, fd):
+def restore(fd):
     signature = read_exact(fd, len(SIGNATURE),
         "not a valid guest state file: signature read")
     if signature != SIGNATURE:
@@ -113,7 +112,7 @@ def restore(xd, fd):
         raise XendError("not a valid guest state file: config parse")
 
     vmconfig = p.get_val()
-    dominfo = xd.domain_configure(vmconfig)
+    dominfo = XendDomainInfo.restore(vmconfig)
 
     l = read_exact(fd, sizeof_unsigned_long,
                    "not a valid guest state file: pfn count read")
@@ -133,7 +132,7 @@ def restore(xd, fd):
         console_evtchn = 0
 
     cmd = [PATH_XC_RESTORE, str(xc.handle()), str(fd),
-           str(dominfo.domid), str(nr_pfns),
+           str(dominfo.getDomid()), str(nr_pfns),
            str(store_evtchn), str(console_evtchn)]
     log.info("[xc_restore] " + join(cmd))
     child = xPopen3(cmd, True, -1, [fd, xc.handle()])
@@ -161,10 +160,10 @@ def restore(xd, fd):
                         if dominfo.store_channel:
                             dominfo.setStoreRef(int(m.group(2)))
                             if dominfo.store_mfn >= 0:
-                                IntroduceDomain(dominfo.domid,
+                                IntroduceDomain(dominfo.getDomid(),
                                                 dominfo.store_mfn,
                                                 dominfo.store_channel.port1,
-                                                dominfo.path)
+                                                dominfo.getDomainPath())
                     m = re.match(r"^(console-mfn) (\d+)\n$", l)
                     if m:
                         dominfo.setConsoleRef(int(m.group(2)))
index 6820bb33f08afded9019b5f8b62dde54d70af77b..110d5310700a37cc6c01a3e76eb1992bb3419880 100644 (file)
@@ -22,6 +22,7 @@
  Needs to be persistent for one uptime.
 """
 import os
+import threading
 
 import xen.lowlevel.xc
 
@@ -57,6 +58,9 @@ class XendDomain:
 
     """Dict of domain info indexed by domain id."""
     domains = None
+
+
+    ## public:
     
     def __init__(self):
         # Hack alert. Python does not support mutual imports, but XendDomainInfo
@@ -65,6 +69,7 @@ class XendDomain:
         # So we stuff the XendDomain instance (self) into xroot's components.
         xroot.add_component("xen.xend.XendDomain", self)
         self.domains = XendDomainDict()
+        self.refresh_lock = threading.Condition()
         self.watchReleaseDomain()
         self.refresh()
         self.dom0_setup()
@@ -94,6 +99,9 @@ class XendDomain:
         doms = self.list_sorted()
         return map(lambda x: x.getName(), doms)
 
+
+    ## private:
+
     def onReleaseDomain(self):
         self.refresh()
 
@@ -135,9 +143,6 @@ class XendDomain:
 
     def dom0_setup(self):
         dom0 = self.domain_lookup(PRIV_DOMAIN)
-        if not dom0:
-            dom0 = self.recreate_domain(self.xen_domain(PRIV_DOMAIN))
-        dom0.dom0_init_store()
         dom0.dom0_enforce_vcpus()
 
 
@@ -150,10 +155,10 @@ class XendDomain:
         if info.getDomid() in self.domains:
             notify = False
         self.domains[info.getDomid()] = info
-        info.exportToDB()
-        if notify:
-            eserver.inject('xend.domain.create', [info.getName(),
-                                                  info.getDomid()])
+        #info.exportToDB()
+        #if notify:
+        #    eserver.inject('xend.domain.create', [info.getName(),
+        #                                          info.getDomid()])
 
     def _delete_domain(self, domid, notify=True):
         """Remove a domain from the tables.
@@ -164,8 +169,8 @@ class XendDomain:
         info = self.domains.get(domid)
         if info:
             del self.domains[domid]
-            info.cleanup()
-            info.delete()
+            info.cleanupDomain()
+            info.cleanupVm()
             if notify:
                 eserver.inject('xend.domain.died', [info.getName(),
                                                     info.getDomid()])
@@ -174,25 +179,36 @@ class XendDomain:
     def refresh(self):
         """Refresh domain list from Xen.
         """
-        doms = self.xen_domains()
-        for d in self.domains.values():
-            info = doms.get(d.getDomid())
-            if info:
-                d.update(info)
-            else:
-                self._delete_domain(d.getDomid())
-        for d in doms:
-            if d not in self.domains:
-                try:
-                    self.recreate_domain(doms[d])
-                except:
-                    log.exception(
-                        "Failed to recreate information for domain %d.  "
-                        "Destroying it in the hope of recovery.", d)
+        self.refresh_lock.acquire()
+        try:
+            doms = self.xen_domains()
+            for d in self.domains.values():
+                info = doms.get(d.getDomid())
+                if info:
+                    d.update(info)
+                else:
+                    self._delete_domain(d.getDomid())
+            for d in doms:
+                if d not in self.domains and not doms[d]['dying']:
                     try:
-                        xc.domain_destroy(dom = d)
+                        self.recreate_domain(doms[d])
                     except:
-                        log.exception('Destruction of %d failed.', d)
+                        if d == PRIV_DOMAIN:
+                            log.exception(
+                                "Failed to recreate information for domain "
+                                "%d.  Doing nothing except crossing my "
+                                "fingers.", d)
+                        else:
+                            log.exception(
+                                "Failed to recreate information for domain "
+                                "%d.  Destroying it in the hope of "
+                                "recovery.", d)
+                            try:
+                                xc.domain_destroy(dom = d)
+                            except:
+                                log.exception('Destruction of %d failed.', d)
+        finally:
+            self.refresh_lock.release()
 
 
     def update_domain(self, id):
@@ -208,6 +224,9 @@ class XendDomain:
         else:
             self._delete_domain(id)
 
+
+    ## public:
+
     def domain_create(self, config):
         """Create a domain from a configuration.
 
@@ -219,19 +238,12 @@ class XendDomain:
         return dominfo
 
     def domain_configure(self, config):
-        """Configure an existing domain. This is intended for internal
-        use by domain restore and migrate.
+        """Configure an existing domain.
 
         @param vmconfig: vm configuration
         """
-        # We accept our configuration specified as ['config' [...]], which
-        # some tools or configuration files may be using.  For save-restore,
-        # we use the value of XendDomainInfo.sxpr() directly, which has no
-        # such item.
-        nested = sxp.child_value(config, 'config')
-        if nested:
-            config = nested
-        return XendDomainInfo.restore(config)
+        # !!!
+        raise XendError("Unsupported")
 
     def domain_restore(self, src):
         """Restore a domain from file.
@@ -241,7 +253,7 @@ class XendDomain:
 
         try:
             fd = os.open(src, os.O_RDONLY)
-            dominfo = XendCheckpoint.restore(self, fd)
+            dominfo = XendCheckpoint.restore(fd)
             self._add_domain(dominfo)
             return dominfo
         except OSError, ex:
index 6a3757693cf1defdaa45a7c8b0ebdeb66e1c2716..820bae27fa6b9f469713282d9b4bf5b62a5905dd 100644 (file)
@@ -94,8 +94,8 @@ SIF_TPM_BE_DOMAIN = (1<<7)
 SHUTDOWN_TIMEOUT = 30
 
 
-DOMROOT = '/domain'
-VMROOT  = '/domain'
+DOMROOT = '/local/domain/'
+VMROOT  = '/vm/'
 
 
 xc = xen.lowlevel.xc.new()
@@ -116,6 +116,31 @@ ROUNDTRIPPING_CONFIG_ENTRIES = [
     ]
 
 
+def restore(config):
+    """Create a domain and a VM object to do a restore.
+
+    @param config:    domain configuration
+    """
+
+    log.debug("XendDomainInfo.restore(%s)", config)
+
+    try:
+        uuid    =     sxp.child_value(config, 'uuid')
+        ssidref = int(sxp.child_value(config, 'ssidref'))
+    except TypeError, exn:
+        raise VmError('Invalid ssidref in config: %s' % exn)
+
+    vm = XendDomainInfo(uuid, XendDomainInfo.parseConfig(config),
+                        xc.domain_create(ssidref = ssidref))
+    vm.storeVmDetails()
+    vm.configure()
+    vm.create_channel()
+#         vm.exportToDB()
+#    vm.refreshShutdown()
+    vm.storeDomDetails()
+    return vm
+
+
 def domain_exists(name):
     # See comment in XendDomain constructor.
     xd = get_component('xen.xend.XendDomain')
@@ -161,7 +186,7 @@ class XendDomainInfo:
         @raise: VmError for invalid configuration
         """
 
-        log.debug("XendDomainInfo.create(...)")
+        log.debug("XendDomainInfo.create(%s)", config)
         
         vm = cls(getUuid(), cls.parseConfig(config))
         vm.construct()
@@ -172,10 +197,14 @@ class XendDomainInfo:
 
 
     def recreate(cls, xeninfo):
-        """Create the VM object for an existing domain."""
+        """Create the VM object for an existing domain.  The domain must not
+        be dying, as the paths in the store should already have been removed,
+        and asking us to recreate them causes problems."""
 
         log.debug("XendDomainInfo.recreate(%s)", xeninfo)
 
+        assert not xeninfo['dying']
+
         domid = xeninfo['dom']
         try:
             dompath = GetDomainPath(domid)
@@ -191,45 +220,29 @@ class XendDomainInfo:
                 raise XendError(
                     'No vm/uuid path in store for existing domain %d' % domid)
 
-        except Exception, exn:
-            log.warn(str(exn))
-            uuid = getUuid()
-
-        log.info("Recreating domain %d, uuid %s", domid, uuid)
-
-        vm = cls(uuid, xeninfo, domid, True)
-        vm.refreshShutdown(xeninfo)
-        return vm
-
-    recreate = classmethod(recreate)
+            log.info("Recreating domain %d, UUID %s.", domid, uuid)
 
+            vm = cls(uuid, xeninfo, domid, True)
 
-    def restore(cls, config, uuid = None):
-        """Create a domain and a VM object to do a restore.
-
-        @param config:    domain configuration
-        @param uuid:      uuid to use
-        """
-        
-        log.debug("XendDomainInfo.restore(%s, %s)", config, uuid)
+        except Exception, exn:
+            log.warn(str(exn))
 
-        if not uuid:
             uuid = getUuid()
 
-        try:
-            ssidref = int(sxp.child_value(config, 'ssidref'))
-        except TypeError, exn:
-            raise VmError('Invalid ssidref in config: %s' % exn)
+            log.info("Recreating domain %d with new UUID %s.", domid, uuid)
+
+            vm = cls(uuid, xeninfo, domid, True)
+            vm.storeVmDetails()
+            vm.storeDomDetails()
 
-        vm = cls(uuid, cls.parseConfig(config),
-                 xc.domain_create(ssidref = ssidref))
         vm.create_channel()
-        vm.configure()
-        vm.exportToDB()
-        vm.refreshShutdown()
+        if domid == 0:
+            vm.initStoreConnection()
+
+        vm.refreshShutdown(xeninfo)
         return vm
 
-    restore = classmethod(restore)
+    recreate = classmethod(recreate)
 
 
     def parseConfig(cls, config):
@@ -294,8 +307,6 @@ class XendDomainInfo:
         self.uuid = uuid
         self.info = info
 
-        self.path = DOMROOT + "/" + uuid
-
         if domid:
             self.domid = domid
         elif 'dom' in info:
@@ -303,6 +314,12 @@ class XendDomainInfo:
         else:
             self.domid = None
 
+        self.vmpath  = VMROOT + uuid
+        if self.domid is None:
+            self.dompath = None
+        else:
+            self.dompath = DOMROOT + str(self.domid)
+
         if augment:
             self.augmentInfo()
 
@@ -317,9 +334,7 @@ class XendDomainInfo:
 
         self.state = STATE_VM_OK
         self.state_updated = threading.Condition()
-
-        self.writeVm("uuid", self.uuid)
-        self.storeDom("vm", self.path)
+        self.refresh_shutdown_lock = threading.Condition()
 
 
     def augmentInfo(self):
@@ -332,14 +347,22 @@ class XendDomainInfo:
                 self.info[name] = val
 
         params = (("name", str),
-                  ("restart-mode", str),
+                  ("restart_mode", str),
                   ("image",        str),
-                  ("start-time", float))
+                  ("start_time", float))
 
         from_store = self.gatherVm(*params)
 
         map(lambda x, y: useIfNeeded(x[0], y), params, from_store)
 
+        device = []
+        for c in controllerClasses:
+            devconfig = self.getDeviceConfigurations(c)
+            if devconfig:
+                device.extend(map(lambda x: (c, x), devconfig))
+
+        useIfNeeded('device', device)
+
 
     def validateInfo(self):
         """Validate and normalise the info block.  This has either been parsed
@@ -377,7 +400,7 @@ class XendDomainInfo:
             # mem_kb.
 
             def discard_negatives(name):
-                if self.infoIsSet(name) and self.info[name] <= 0:
+                if self.infoIsSet(name) and self.info[name] < 0:
                     del self.info[name]
 
             def valid_KiB_(mb_name, kb_name):
@@ -403,7 +426,7 @@ class XendDomainInfo:
 
             def valid_KiB(mb_name, kb_name):
                 result = valid_KiB_(mb_name, kb_name)
-                if result <= 0:
+                if result is None or result < 0:
                     raise VmError('Invalid %s / %s: %s' %
                                   (mb_name, kb_name, result))
                 else:
@@ -452,42 +475,60 @@ class XendDomainInfo:
 
 
     def readVm(self, *args):
-        return xstransact.Read(self.path, *args)
+        return xstransact.Read(self.vmpath, *args)
 
     def writeVm(self, *args):
-        return xstransact.Write(self.path, *args)
+        return xstransact.Write(self.vmpath, *args)
 
     def removeVm(self, *args):
-        return xstransact.Remove(self.path, *args)
+        return xstransact.Remove(self.vmpath, *args)
 
     def gatherVm(self, *args):
-        return xstransact.Gather(self.path, *args)
+        return xstransact.Gather(self.vmpath, *args)
 
     def storeVm(self, *args):
-        return xstransact.Store(self.path, *args)
+        return xstransact.Store(self.vmpath, *args)
 
     def readDom(self, *args):
-        return xstransact.Read(self.path, *args)
+        return xstransact.Read(self.dompath, *args)
 
     def writeDom(self, *args):
-        return xstransact.Write(self.path, *args)
+        return xstransact.Write(self.dompath, *args)
 
     def removeDom(self, *args):
-        return xstransact.Remove(self.path, *args)
+        return xstransact.Remove(self.dompath, *args)
 
     def gatherDom(self, *args):
-        return xstransact.Gather(self.path, *args)
+        return xstransact.Gather(self.dompath, *args)
 
     def storeDom(self, *args):
-        return xstransact.Store(self.path, *args)
+        return xstransact.Store(self.dompath, *args)
 
 
-    def exportToDB(self):
+    def storeVmDetails(self):
         to_store = {
-            'domid':              str(self.domid),
             'uuid':               self.uuid,
 
-            'xend/restart_mode':  str(self.info['restart_mode']),
+            # !!!
+            'memory/target':      str(self.info['memory_KiB'])
+            }
+
+        if self.infoIsSet('image'):
+            to_store['image'] = sxp.to_string(self.info['image'])
+
+        for k in ['name', 'ssidref', 'restart_mode']:
+            if self.infoIsSet(k):
+                to_store[k] = str(self.info[k])
+
+        log.debug("Storing VM details: %s" % str(to_store))
+
+        self.writeVm(to_store)
+
+
+    def storeDomDetails(self):
+        to_store = {
+            'domid':              str(self.domid),
+            'vm':                 self.vmpath,
 
             'memory/target':      str(self.info['memory_KiB'])
             }
@@ -496,11 +537,9 @@ class XendDomainInfo:
             if v:
                 to_store[k] = str(v)
 
-        to_store['image'] = sxp.to_string(self.info['image'])
-
-        log.debug("Storing %s" % str(to_store))
+        log.debug("Storing domain details: %s" % str(to_store))
 
-        self.writeVm(to_store)
+        self.writeDom(to_store)
 
 
     def setDomid(self, domid):
@@ -522,8 +561,8 @@ class XendDomainInfo:
     def getName(self):
         return self.info['name']
 
-    def getPath(self):
-        return self.path
+    def getDomainPath(self):
+        return self.dompath
 
     def getUuid(self):
         return self.uuid
@@ -549,78 +588,97 @@ class XendDomainInfo:
 
 
     def refreshShutdown(self, xeninfo = None):
-        if xeninfo is None:
-            xeninfo = dom_get(self.domid)
+        # If set at the end of this method, a restart is required, with the
+        # given reason.  This restart has to be done out of the scope of
+        # refresh_shutdown_lock.
+        restart_reason = None
+        
+        self.refresh_shutdown_lock.acquire()
+        try:
             if xeninfo is None:
-                # The domain no longer exists.  This will occur if we have
-                # scheduled a timer to check for shutdown timeouts and the
-                # shutdown succeeded.
+                xeninfo = dom_get(self.domid)
+                if xeninfo is None:
+                    # The domain no longer exists.  This will occur if we have
+                    # scheduled a timer to check for shutdown timeouts and the
+                    # shutdown succeeded.  It will also occur if someone
+                    # destroys a domain beneath us.  We clean up, just in
+                    # case.
+                    self.cleanupDomain()
+                    self.cleanupVm()
+                    return
+
+            if xeninfo['dying']:
+                # Dying means that a domain has been destroyed, but has not
+                # yet been cleaned up by Xen.  This could persist indefinitely
+                # if, for example, another domain has some of its pages
+                # mapped.  We might like to diagnose this problem in the
+                # future, but for now all we do is make sure that it's not
+                # us holding the pages, by calling the cleanup methods.
+                self.cleanupDomain()
+                self.cleanupVm()
                 return
 
-        if xeninfo['dying']:
-            # Dying means that a domain has been destroyed, but has not yet
-            # been cleaned up by Xen.  This could persist indefinitely if,
-            # for example, another domain has some of its pages mapped.
-            # We might like to diagnose this problem in the future, but for
-            # now all we can sensibly do is ignore it.
-            pass
+            elif xeninfo['crashed']:
+                log.warn('Domain has crashed: name=%s id=%d.',
+                         self.info['name'], self.domid)
 
-        elif xeninfo['crashed']:
-            log.warn('Domain has crashed: name=%s id=%d.',
-                     self.info['name'], self.domid)
+                if xroot.get_enable_dump():
+                    self.dumpCore()
 
-            if xroot.get_enable_dump():
-                self.dumpCore()
+                restart_reason = 'crashed'
 
-            self.maybeRestart('crashed')
+            elif xeninfo['shutdown']:
+                reason = shutdown_reason(xeninfo['shutdown_reason'])
 
-        elif xeninfo['shutdown']:
-            reason = shutdown_reason(xeninfo['shutdown_reason'])
+                log.info('Domain has shutdown: name=%s id=%d reason=%s.',
+                         self.info['name'], self.domid, reason)
 
-            log.info('Domain has shutdown: name=%s id=%d reason=%s.',
-                     self.info['name'], self.domid, reason)
+                self.clearRestart()
 
-            self.clearRestart()
+                if reason == 'suspend':
+                    self.state_set(STATE_VM_SUSPENDED)
+                    # Don't destroy the domain.  XendCheckpoint will do this
+                    # once it has finished.
+                elif reason in ['poweroff', 'reboot']:
+                    restart_reason = reason
+                else:
+                    self.destroy()
 
-            if reason == 'suspend':
-                self.state_set(STATE_VM_SUSPENDED)
-                # Don't destroy the domain.  XendCheckpoint will do this once
-                # it has finished.
-            elif reason in ['poweroff', 'reboot']:
-                self.maybeRestart(reason)
             else:
-                self.destroy()
+                # Domain is alive.  If we are shutting it down, then check
+                # the timeout on that, and destroy it if necessary.
+
+                sst = self.readDom('xend/shutdown_start_time')
+                if sst:
+                    sst = float(sst)
+                    timeout = SHUTDOWN_TIMEOUT - time.time() + sst
+                    if timeout < 0:
+                        log.info(
+                            "Domain shutdown timeout expired: name=%s id=%s",
+                            self.info['name'], self.domid)
+                        self.destroy()
+                    else:
+                        log.debug(
+                            "Scheduling refreshShutdown on domain %d in %ds.",
+                            self.domid, timeout)
+                        scheduler.later(timeout, self.refreshShutdown)
+        finally:
+            self.refresh_shutdown_lock.release()
 
-        else:
-            # Domain is alive.  If we are shutting it down, then check
-            # the timeout on that, and destroy it if necessary.
-
-            sst = self.readVm('xend/shutdown_start_time')
-            if sst:
-                sst = float(sst)
-                timeout = SHUTDOWN_TIMEOUT - time.time() + sst
-                if timeout < 0:
-                    log.info(
-                        "Domain shutdown timeout expired: name=%s id=%s",
-                        self.info['name'], self.domid)
-                    self.destroy()
-                else:
-                    log.debug(
-                        "Scheduling refreshShutdown on domain %d in %ds.",
-                        self.domid, timeout)
-                    scheduler.later(timeout, self.refreshShutdown)
+        if restart_reason:
+            self.maybeRestart(restart_reason)
 
 
     def shutdown(self, reason):
         if not reason in shutdown_reasons.values():
             raise XendError('invalid reason:' + reason)
-        self.storeVm("control/shutdown", reason)
+        self.storeDom("control/shutdown", reason)
         if not reason == 'suspend':
-            self.storeVm('xend/shutdown_start_time', time.time())
+            self.storeDom('xend/shutdown_start_time', time.time())
 
 
     def clearRestart(self):
-        self.removeVm("xend/shutdown_start_time")
+        self.removeDom("xend/shutdown_start_time")
 
 
     def maybeRestart(self, reason):
@@ -647,12 +705,10 @@ class XendDomainInfo:
         """Close the given channel, if set, and remove the given entry in the
         store.  Nothrow guarantee."""
         
+        if channel:
+            channel.close()
         try:
-            try:
-                if channel:
-                    channel.close()
-            finally:
-                self.removeDom(entry)
+            self.removeDom(entry)
         except Exception, exn:
             log.exception(exn)
         
@@ -753,6 +809,10 @@ class XendDomainInfo:
 
     ## private:
 
+    def getDeviceConfigurations(self, deviceClass):
+        return self.getDeviceController(deviceClass).configurations()
+
+
     def getDeviceController(self, name):
         if name not in controllerClasses:
             raise XendError("unknown device type: " + str(name))
@@ -864,9 +924,8 @@ class XendDomainInfo:
 
 
     def construct(self):
-        """Construct the vm instance from its configuration.
+        """Construct the domain.
 
-        @param config: configuration
         @raise: VmError on error
         """
 
@@ -881,17 +940,18 @@ class XendDomainInfo:
                           self.info['name'])
 
         try:
+            self.dompath = DOMROOT + str(self.domid)
+
             self.initDomain()
             self.construct_image()
             self.configure()
-            self.exportToDB()
-        except Exception, ex:
-            # Catch errors, cleanup and re-raise.
-            print 'Domain construction error:', ex
-            import traceback
-            traceback.print_exc()
+            self.storeVmDetails()
+            self.storeDomDetails()
+        except Exception:
+            log.exception('Domain construction failed')
             self.destroy()
-            raise
+            raise VmError('Creating domain failed: name=%s' %
+                          self.info['name'])
 
 
     def initDomain(self):
@@ -926,38 +986,29 @@ class XendDomainInfo:
                   self.domid, self.info['name'], self.info['memory_KiB'])
 
 
-    def configure_vcpus(self, vcpus):
+    def configure_vcpus(self):
         d = {}
-        for v in range(0, vcpus):
+        for v in range(0, self.info['vcpus']):
             d["cpu/%d/availability" % v] = "online"
         self.writeVm(d)
 
+
     def construct_image(self):
         """Construct the boot image for the domain.
         """
         self.create_channel()
         self.image.createImage()
-        self.exportToDB()
-        if self.store_channel and self.store_mfn >= 0:
-            IntroduceDomain(self.domid, self.store_mfn,
-                            self.store_channel.port1, self.path)
-        # get the configured value of vcpus and update store
-        self.configure_vcpus(self.info['vcpus'])
+#        !!! self.exportToDB()
+        IntroduceDomain(self.domid, self.store_mfn,
+                        self.store_channel.port1, self.dompath)
+        self.configure_vcpus()
 
 
     ## public:
 
-    def delete(self):
-        """Delete the vm's db.
-        """
-        try:
-            xstransact.Remove(self.path, 'domid')
-        except Exception, ex:
-            log.warning("error in domain db delete: %s", ex)
-
-
-    def cleanup(self):
-        """Cleanup vm resources: release devices.  Nothrow guarantee."""
+    def cleanupDomain(self):
+        """Cleanup domain resources; release devices.  Idempotent.  Nothrow
+        guarantee."""
 
         self.state_set(STATE_VM_TERMINATED)
         self.release_devices()
@@ -972,24 +1023,29 @@ class XendDomainInfo:
                     "XendDomainInfo.cleanup: image.destroy() failed.")
             self.image = None
 
+        try:
+            self.removeDom()
+        except Exception:
+            log.exception("Removing domain path failed.")
 
-    def destroy(self):
-        """Cleanup vm and destroy domain.  Nothrow guarantee."""
-
-        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
 
-        self.cleanup()
+    def cleanupVm(self):
+        """Cleanup VM resources.  Idempotent.  Nothrow guarantee."""
 
         try:
             self.removeVm()
         except Exception:
             log.exception("Removing VM path failed.")
 
-        try:
-            self.removeDom()
-        except Exception:
-            log.exception("Removing domain path failed.")
 
+    def destroy(self):
+        """Cleanup VM and destroy domain.  Nothrow guarantee."""
+
+        log.debug("XendDomainInfo.destroy: domid=%s", str(self.domid))
+
+        self.cleanupDomain()
+        self.cleanupVm()
+        
         try:
             if self.domid is not None:
                 xc.domain_destroy(dom=self.domid)
@@ -1002,11 +1058,12 @@ class XendDomainInfo:
         """
         return self.state == STATE_VM_TERMINATED
 
+
     def release_devices(self):
-        """Release all vm devices.  Nothrow guarantee."""
+        """Release all domain's devices.  Nothrow guarantee."""
 
         while True:
-            t = xstransact("%s/device" % self.path)
+            t = xstransact("%s/device" % self.dompath)
             for n in controllerClasses.keys():
                 for d in t.list(n):
                     try:
@@ -1020,6 +1077,7 @@ class XendDomainInfo:
             if t.commit():
                 break
 
+
     def eventChannel(self, path=None):
         """Create an event channel to the domain.
         
@@ -1030,9 +1088,29 @@ class XendDomainInfo:
             try:
                 port = int(self.readDom(path))
             except:
-                # if anything goes wrong, assume the port was not yet set
+                # The port is not yet set, i.e. the channel has not yet been
+                # created.
                 pass
         ret = channel.eventChannel(0, self.domid, port1=port, port2=0)
+
+        # Stale port information from above causes an Invalid Argument to be
+        # thrown by the eventChannel call below.  To recover, we throw away
+        # port if it turns out to be bad, and just create a new channel.
+        # If creating a new channel with two new ports fails, then something
+        # else is going wrong, so we bail.
+        while True:
+            try:
+                ret = channel.eventChannel(0, self.domid, port1 = port,
+                                           port2 = 0)
+                break
+            except:
+                log.exception("Exception in eventChannel(0, %d, %d, %d)",
+                              self.domid, port, 0)
+                if port == 0:
+                    raise
+                else:
+                    port = 0
+                    log.error("Recovering from above exception.")
         self.storeDom(path, ret.port1)
         return ret
         
@@ -1113,10 +1191,11 @@ class XendDomainInfo:
         """Restart the domain after it has exited. """
 
         #            self.restart_check()
-        self.cleanup()
 
         config = self.sxpr()
 
+        self.cleanupDomain()
+
         if self.readVm('xend/restart_in_progress'):
             log.error('Xend failed during restart of domain %d.  '
                       'Refusing to restart to avoid loops.',
@@ -1188,26 +1267,23 @@ class XendDomainInfo:
         self.storeVm("cpu/%d/availability" % vcpu, availability)
 
     def send_sysrq(self, key=0):
-        self.storeVm("control/sysrq", '%c' % key)
+        self.storeDom("control/sysrq", '%c' % key)
 
-    def dom0_init_store(self):
-        if not self.store_channel:
-            self.store_channel = self.eventChannel("store/port")
-            if not self.store_channel:
-                return
+
+    def initStoreConnection(self):
         ref = xc.init_store(self.store_channel.port2)
         if ref and ref >= 0:
             self.setStoreRef(ref)
             try:
                 IntroduceDomain(self.domid, ref, self.store_channel.port1,
-                                self.path)
+                                self.dompath)
             except RuntimeError, ex:
                 if ex.args[0] == errno.EISCONN:
                     pass
                 else:
                     raise
-            # get run-time value of vcpus and update store
-            self.configure_vcpus(dom_get(self.domid)['vcpus'])
+        self.configure_vcpus()
+
 
     def dom0_enforce_vcpus(self):
         dom = 0
index 942efd46b4d245175af256c5f8f56ee8b31deb0f..0ede664ba09d824ad30b160da9b903b9d3fa2fbc 100644 (file)
@@ -219,7 +219,7 @@ class DevController:
     def backendPath(self, backdom, devid):
         """@param backdom [XendDomainInfo] The backend domain info."""
 
-        return "%s/backend/%s/%s/%d" % (backdom.getPath(),
+        return "%s/backend/%s/%s/%d" % (backdom.getDomainPath(),
                                         self.deviceClass,
                                         self.vm.getUuid(), devid)
 
@@ -229,9 +229,9 @@ class DevController:
 
 
     def frontendRoot(self):
-        return "%s/device/%s" % (self.vm.getPath(), self.deviceClass)
+        return "%s/device/%s" % (self.vm.getDomainPath(), self.deviceClass)
 
 
     def frontendMiscPath(self):
-        return "%s/device-misc/%s" % (self.vm.getPath(),
+        return "%s/device-misc/%s" % (self.vm.getDomainPath(),
                                       self.deviceClass)